/////////////////////////////////////////////////////////////////////////////////////////////		
* This do-file generates Table 7; Table 8; and Table A.10
/////////////////////////////////////////////////////////////////////////////////////////////	
 
use "${inputdir}/prospects_analysis_dataset_clean.dta", clear

	drop if interviews!=2	

	gen lfi_L1 = lfi1
	drop working
		
	gen psi_L1 = psi_std1	
	
	gen everattend = sfc_totalAttendance > 0
	replace everattend = . if SFC2!=1
	gen sfc_totdays = sfc_totalAttendance
	gen totattend = sfc_totalAttendance		
	
	keep female reg_age age2 r1_HighGrade notraining sfc_totdays tf anyfriends totattend everattend lfi1anc commid BOTH sfc_any *_L1  ///
		  psi* lfi* sfcgroupid ANYSFC respid female r2_consent 	
	
	reshape long lfi psi_std , i(respid) j(time)
	
	replace sfc_any = 0 if time==1
	gen SFC = (time==2) * ANYSFC
	replace sfc_totdays = 0 if time==1
	
	bys respid: egen anysfc = max(sfc_any)
	
	sum reg_age, detail								// create bins of similar age/gender
	gen young = reg_age <=r(p50)
	gen yf = young==1 & female==1
	gen of = young==0 & female==1
	gen ym = young==1 & female==0					
	gen om = young==0 & female==0	
	gen old = young==0
		
	foreach v in yf of ym om young old{
		gen temp = `v'
		replace temp = . if time==2
		bys sfcgroupid: egen grp_`v' = sum(temp)
		drop temp
		replace grp_`v' = 0 if sfcgroupid>30
		}
	
	gen gr_match = .
	replace gr_match = grp_yf if young==1 & female==1
	replace gr_match = grp_of if young==0 & female==1
	replace gr_match = grp_ym if young==1 & female==0
	replace gr_match = grp_om if young==0 & female==0

	gen gr_agematch = .
	replace gr_agematch = grp_young if young==1
	replace gr_agematch = grp_old if young==0
	
	replace everattend = 0 if time==1
	replace totattend = 0 if time==1
	gen everattend_L1 = 0
	gen totattend_L1 = 0
		
	rename psi_std psi
	
	* Predicted Outcomes	
	local controls2 = "female reg_age age2 i.r1_HighGrade notrain"	
		foreach oc in lfi psi {
				reg `oc' `controls2' i.commid if SFC==0 & time==2
				predict `oc'_hat, xb
				gen `oc'_badhat = -1 * `oc'_hat							// flip sign so you predict disadvantage
				sum `oc'_badhat, detail
				gen `oc'_hat_low = `oc'_badhat > r(p50)
				replace `oc'_hat_low = . if `oc'_badhat ==.
				gen SFCx`oc'_badhat = `oc'_badhat * SFC
				gen SFCx`oc'_hat_low = `oc'_hat_low * SFC
			}
		
		sum lfi_hat, detail
		gen lfi_disadv = lfi_hat < r(p50)
		replace lfi_disadv = . if lfi_hat==.
		
		sum psi_hat, detail
		gen psi_disadv = psi_hat < r(p50)
		replace psi_disadv = . if psi_hat==.	
	
	tempfile prepdata
	save `prepdata'	

/////////////////////////////////////////////////////////////////////////////////////////////		
* 									TABLE 7
/////////////////////////////////////////////////////////////////////////////////////////////	

local controls1 = "female reg_age age2 i.r1_HighGrade"
replace lfi_L1 = lfi1anc
gen Y_L1 = .
		
egen commidxgrp1 = group(commid young)
egen commidxgrp2 = group(commid young female) 		
replace everattend = 0 if SFC==0
replace totattend = 0 if SFC==0

gen gao = gr_agematch * (young==0)
gen gago = gr_match * (young==0)
gen gaf = gr_agematch * female
gen gagf = gr_match * female
gen gaof = gao * female
gen gagof = gago * female 
gen SFCxfemale = SFC * female
gen SFCxold = SFC * (young==0)
gen SFCxfemalexold = SFC * female * (young==0)

local t = 1	
foreach oc in psi lfi everattend totattend {	

		replace Y_L1 = `oc'_L1
		
		bys respid: egen inboth = count(`oc')
		replace inboth = inboth==2			
			
		* Main core results		
		reghdfe `oc' SFC gr_agematch Y_L1 female reg_age age2 i.r1_HighGrade if time==2 , absorb(commidxgrp1) vce(cluster sfcgroupid)
		estadd loc tfe = "Yes"
		estadd ysumm	
		sum `oc' if time==2 	
		estadd loc num = r(N)		
		sum `oc' if ANYSFC==0 & time==2 & inboth==1
		estadd scalar cm = r(mean)
		test SFC=0
		estadd scalar pval = r(p)
		eststo a`t'
		
		* Main core results		
		reghdfe `oc' SFC gr_agematch gr_match  Y_L1 reg_age age2 i.r1_HighGrade if time==2 , absorb(commidxgrp2) vce(cluster sfcgroupid)
		estadd loc tfe = "Yes"
		estadd ysumm	
		sum `oc' if time==2 	
		estadd loc num = r(N)		
		sum `oc' if ANYSFC==0 & time==2 & inboth==1
		estadd scalar cm = r(mean)
		test SFC=0
		estadd scalar pval = r(p)
		eststo b`t'			
		
		* Presence of any friends
		reghdfe `oc' SFC anyfriends i.tf Y_L1 female reg_age age2  if time==2, absorb(commid r1_HighGrade) vce(cluster sfcgroupid)
		estadd loc tfe = "Yes"
		estadd ysumm	
		sum `oc' if time==2 	
		estadd loc num = r(N)		
		sum `oc' if ANYSFC==0 & time==2 & inboth==1
		estadd scalar cm = r(mean)
		test SFC=0
		estadd scalar pval = r(p)
		eststo f`t'						
				
		drop inboth
		local t = `t' + 1
		}			
		
		* TABLE 7 PANEL A
		# delimit ;
			esttab f3 f4 f1 f2  using "${outputdir}/tables/Table_7_panela.tex", replace b(%10.3f) se(%10.3f) scalars(
			"ymean Mean Y" "cm Control Mean" "pval P-Val" "r2 R2")	sfmt(%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f) star(* 0.10 ** 0.05 *** 0.01) 
			keep(SFC anyfriend*) 
			order(SFC *gr* *L1) mlabels(Ever Tot PSI LFI) nogaps varlabel(anyfriends "SFC x Any Friends")
			align(cccccc) title(Effects of Group Cohesion, Panel (a)) note(Regressions include community FEs) nomtitles;
		# delimit cr
		
		* TABLE 7 PANEL C
		# delimit ;
			esttab a3 a4 a1 a2  using "${outputdir}/tables/Table_7_panelc.tex", replace b(%10.3f) se(%10.3f) scalars(
			"ymean Mean Y" "cm Control Mean" "pval P-Val" "r2 R2")	sfmt(%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f) star(* 0.10 ** 0.05 *** 0.01) 
			keep(SFC *gr* *L1) 
			order(SFC *gr* *L1) mlabels(Ever Tot PSI LFI) nogaps varlabel(gr_agematch "\# Grp Members w/ Sim Age" gr_match "\# Grp Members w/ Sim Age and Gender")
			align(cccccc) title(Effects of Group Cohesion, Panel (c)) note(Regressions include community x member type FEs) nomtitles;
		# delimit cr	
		
		* TABLE 7 PANEL D
		# delimit ;
			esttab b3 b4 b1 b2  using "${outputdir}/tables/Table_7_paneld.tex", replace b(%10.3f) se(%10.3f) scalars(
			"ymean Mean Y" "cm Control Mean" "pval P-Val" "r2 R2")	sfmt(%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f) star(* 0.10 ** 0.05 *** 0.01) 
			keep(SFC *gr* *L1) 
			order(SFC *gr* *L1) mlabels(Ever Tot PSI LFI) nogaps varlabel(gr_agematch "\# Grp Members w/ Sim Age" gr_match "\# Grp Members w/ Sim Age and Gender")
			align(cccccc) title(Effects of Group Cohesion, Panel (d)) note(Regressions include community x member type FEs) nomtitles;
		# delimit cr			
		
		
/////////////////////////////////////////////////////////////////////////////////////////////		
*								TABLE 8 and TABLE A.10								*
/////////////////////////////////////////////////////////////////////////////////////////////	
							
bys sfcgroupid: egen temp = count(sfcgroupid) if time==1
bys respid: egen grpsize = max(temp)
drop temp

bys commid: egen temp = count(commid) if time==1
bys respid: egen commsize = max(temp)
drop temp

foreach v in lfi psi{
	bys sfcgroupid: egen temp = mean(`v') if time==1				// group means
	bys commid: egen tempx = mean(`v') if time==1					// community means
	gen temp2 = ((temp * grpsize) - `v') / (grpsize - 1)			// leave out mean removing own value from group mean
	gen temp2z = ((tempx * commsize) - `v') / (commsize - 1)		// leave out community mean	
	bys respid: egen tempy = max(tempx)
	bys respid: egen `v'_grpmean = max(temp)
	bys respid: egen `v'_lomean = max(temp2)
	bys respid: egen tempz = max(temp2z)	
	gen `v'_lomean_adj2 = `v'_lomean - tempz						// adjusting group leave out mean by community leave out mean
	replace `v'_grpmean = 0 if sfcgroupid>30
	replace `v'_lomean = 0 if sfcgroupid>30
	replace `v'_lomean_adj2 = 0 if sfcgroupid>30	
	replace `v'_lomean = `v'_lomean_adj2	
	drop temp*
	}

gen glom = .

foreach v in psi lfi{
	gen temp = `v'
	replace temp = . if time==2
	bys commid female: egen med_`v' = pctile(temp), p(75)
	gen bad_temp = `v' < med_`v'
	replace bad_temp = . if time ==2
	bys respid: egen bad_`v' = max(bad_temp)
	drop *temp*
	}
	
foreach v in psi lfi{
	gen glomx`v'_disadv = `v'_lomean * `v'_disadv
	gen SFCx`v'_disadv = `v'_disadv * SFC
	}

local t = 1			
foreach oc in everattend totattend psi lfi {	
	
	local e = 1
	foreach p in psi lfi{

		replace Y_L1 = `oc'_L1
		replace glom = `p'_lomean
		bys respid: egen inboth = count(`oc')
		replace inboth = inboth==2			
			
		* Main core results		
		reghdfe `oc' SFC glom lfi_L1 psi_L1 `controls1'  if time==2, absorb(commid) vce(cluster sfcgroupid)
		estadd loc tfe = "Yes"
		estadd ysumm	
		sum `oc' if time==2 	
		estadd loc num = r(N)		
		sum `oc' if ANYSFC==0 & time==2 & inboth==1
		estadd scalar cm = r(mean)
		test SFC=0
		estadd scalar pval = r(p)
		eststo o`t'_p`e'
		
		* Main core results	- LFI Disadv Heterogeneity	
		reghdfe `oc' SFC SFCxlfi_disadv glom glomxlfi_disadv lfi_disadv lfi_L1  psi_L1 `controls1'  if time==2, absorb(commid) vce(cluster sfcgroupid)
		estadd loc tfe = "Yes"
		estadd ysumm	
		sum `oc' if time==2 	
		estadd loc num = r(N)		
		sum `oc' if ANYSFC==0 & time==2 & inboth==1
		estadd scalar cm = r(mean)
		test SFC=0
		estadd scalar pval = r(p)
		eststo lfi`t'_p`e'
		
		local e = `e' + 1
		
		drop inboth
		}
			
		local t = `t' + 1
		}
		
		* TABLE 8 PANEL A
		# delimit ;
			esttab o1_p1 o2_p1 o3_p1 o4_p1 using "${outputdir}/tables/Table_8_panela.tex", replace b(%10.3f) se(%10.3f) scalars(
			"ymean Mean Y" "cm Control Mean" "pval P-Val" "r2 R2")	sfmt(%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f) star(* 0.10 ** 0.05 *** 0.01) keep(SFC *lom* *L1) 
			order() mlabels(Ever Tot PSI LFI) nogaps varlabel(glom "Group Leave Out Mean" lfi_L1 "LFI (Lagged)" psi_L1 "PSI (Lagged)")
			align(cccccc) title(Leave Out Mean Effects by Baseline PSI) nonotes nomtitles;
		# delimit cr
		
		* TABLE 8 PANEL B
		# delimit ;
			esttab o1_p2 o2_p2 o3_p2 o4_p2 using "${outputdir}/tables/Table_8_panelb.tex", replace b(%10.3f) se(%10.3f) scalars(
			"ymean Mean Y" "cm Control Mean" "pval P-Val" "r2 R2")	sfmt(%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f) star(* 0.10 ** 0.05 *** 0.01) keep(SFC *lom* *L1) 
			order() mlabels(Ever Tot PSI LFI) nogaps varlabel(glom "Group Leave Out Mean" lfi_L1 "LFI (Lagged)" psi_L1 "PSI (Lagged)")
			align(cccccc) title(Leave Out Mean Effects by Baseline LFI) nonotes nomtitles;
		# delimit cr
		
		* APPENDIX TABLE A10
		# delimit ;
			esttab lfi1_p1 lfi2_p1 lfi3_p1 lfi4_p1 using "${outputdir}/tables/Table_A10.tex", replace b(%10.3f) se(%10.3f) scalars(
			"ymean Mean Y" "cm Control Mean" "pval P-Val" "r2 R2")	sfmt(%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f) star(* 0.10 ** 0.05 *** 0.01) keep(SFC *lom* *dis* *L1) 
			order() mlabels(Ever Tot PSI LFI) nogaps varlabel(SFCxlfi_disadv "SFC x Low Pred LFI" glom "Group Leave Out Mean" glomxlfi_disadv "Group Leave Out mean x Low Pred LFI" lfi_disadv "Low Pred LFI" lfi_L1 "LFI - Lagged" psi_L1 "PSI - Lagged")
			align(cccccc) title(Baseline PSI Leave Out Mean Heterogeneity on Low Predicted LFI) nonotes nomtitles;
		# delimit cr
		
